;;;; A variable-order word monkey (markov chain of words.

(ns monkey (:gen-class)
    (:use [clojure.contrib.duck-streams :only (reader)])
    (:use [clojure.contrib.str-utils2 :only (split blank?)])
    (:use [clojure.contrib.command-line :only (with-command-line)]))

(def *maxgen* 1000)	; default maximum  number of objects to generate
(def *prefix-len* 2)	; default prefix length

(defn read-words [file]
  (letfn [(read-unfiltered []
                           (when-let [line (read-line)]
                             (concat (split line #"[\s]+") (read-unfiltered))))]
    (with-open [rdr (reader file)] 
      (binding [*in* rdr] (remove blank? (read-unfiltered))))))

(defn add-map  [markov-map prefix-plus]
  (let [prefix (vec (butlast prefix-plus))]
    (assoc markov-map prefix (conj (markov-map prefix) (last prefix-plus)))))

(defn build-map [prefix-len col]
  (reduce add-map {} (partition (inc prefix-len) 1
                                (concat (repeat prefix-len nil) (col) [nil]))))

(defn generate [markov-map prefix]
  (let [suffixes (markov-map (vec prefix))]
    (when-let [suffix (nth suffixes (rand-int (count suffixes)))]
      (lazy-cat [suffix] (generate markov-map (concat (rest prefix) [suffix]))))))

(defn word-monkey
  ([file] (word-monkey file *prefix-len* *maxgen*))
  ([file prefix-len] (word-monkey file prefix-len *maxgen*))
  ([file prefix-len maxgen] 
     (let [markov-map (build-map prefix-len (fn [] (read-words file)))]
       (dorun (dec maxgen)
	      (map println (generate markov-map (repeat prefix-len nil)))))))

(defn character-monkey
  ([file] (character-monkey file *prefix-len* *maxgen*))
  ([file prefix-len] (character-monkey file prefix-len *maxgen*))
  ([file prefix-len maxgen]
     (let [markov-map (build-map prefix-len (fn [] (slurp file)))]
       (dorun (dec maxgen)
	      (map print (generate markov-map (repeat prefix-len nil)))))))

(defn -main [& arguments]
  (with-command-line arguments
      "Run a character or word monkey."
      [[char-monkey? c? "Run a character monkey instead of a word monkey."]
       args]
    (if (< 0 (count args) 4)
      (apply (if char-monkey? character-monkey word-monkey)
	     (cons (first args) (map #(Integer/parseInt %) (rest args))))
      (binding [*out* *err*]
	(println "Usage: monkey [-c|--char-monkey] file [prefix-len [maxout]]"))))
  (flush))

